主要利用Pandas库和Seaborn库.
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
生成4组数据,转为DataFrame
数据类型
xarray = np.linspace(0,10,100)#生成从0倒10,100个数
yarray = xarray**3+np.random.normal(0,100,100) # y=x^3+正态扰动项
zarray = -100*xarray+np.random.normal(0,10,100) # y=-100x+正态扰动项
warray = 200*xarray**0.5+np.random.normal(0,10,100)
x | y | z | w | |
---|---|---|---|---|
0 | 0 | 66.5297 | -7.81256 | 14.5319 |
1 | 0.10101 | -34.835 | -18.8105 | 65.9947 |
2 | 0.20202 | 37.5717 | -21.8944 | 96.7367 |
3 | 0.30303 | 140.38 | -28.7846 | 101.061 |
4 | 0.40404 | 202.198 | -47.9113 | 127.187 |
df.hist(bins=15, color='steelblue', edgecolor='black', linewidth=1.0,
xlabelsize=8, ylabelsize=8, grid=False)
sns.kdeplot(df['w'])
sns.boxplot(data=df)
使用核密度图显示分组数值数据的另一种有效方法(描绘了数据在不同值下的概率密度)
sns.violinplot(data=df)
sns.heatmap(round(df.corr(),2), annot=True, cmap="coolwarm",fmt='.2f',
linewidths=.05)
热力图中的梯度根据相关性的强度而变化,你可以很容易发现彼此之间具有强相关性的潜在属性.
sns.pairplot(data=df,diag_kind='kde')
sns.jointplot(x='x',y='y',data=df,kind='kde')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
xarray = np.linspace(0,10,100)#生成从0倒10,100个数
yarray = xarray**3+np.random.normal(0,100,100) # y=x^3+正态扰动项
zarray = -100*xarray+np.random.normal(0,10,100) # y=-100x+正态扰动项
warray = 200*xarray**0.5+np.random.normal(0,10,100)
df = pd.DataFrame({'x':xarray,'y':yarray,'z':zarray,'w':warray})
print(df.head().to_markdown())
| | x | y | z | w | |---:|--------:|---------:|----------:|---------:| | 0 | 0 | 66.5297 | -7.81256 | 14.5319 | | 1 | 0.10101 | -34.835 | -18.8105 | 65.9947 | | 2 | 0.20202 | 37.5717 | -21.8944 | 96.7367 | | 3 | 0.30303 | 140.38 | -28.7846 | 101.061 | | 4 | 0.40404 | 202.198 | -47.9113 | 127.187 |
df.hist(bins=15, color='steelblue', edgecolor='black', linewidth=1.0,
xlabelsize=8, ylabelsize=8, grid=False)
plt.savefig('images/pad0301.png')
df.hist(bins=15, color='steelblue', edgecolor='black', linewidth=1.0,
xlabelsize=8, ylabelsize=8, grid=False)
#help(df.hist)
sns.kdeplot(df['w'])
plt.savefig('images/pad0302.png')
sns.boxplot(data=df)
plt.savefig('images/pad0303.png')
sns.heatmap(round(df.corr(),2), annot=True, cmap="coolwarm",fmt='.2f',
linewidths=.05)
plt.savefig('images/pad0304.png')
sns.pairplot(data=df,diag_kind='kde')
plt.savefig('images/pad0305.png')
sns.jointplot(x='x',y='y',data=df,kind='kde')
plt.savefig('images/pad0306.png')
sns.violinplot(x='y',data=df)
plt.savefig('images/pad0307.png')